<!DOCTYPE html>
<html lang="zh-CN">
<head>
  <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<meta name="generator" content="Hexo 5.4.0">
  <link rel="apple-touch-icon" sizes="180x180" href="/myblog/images/apple-touch-icon-next.png">
  <link rel="icon" type="image/png" sizes="32x32" href="/myblog/images/favicon-32x32-next.png">
  <link rel="icon" type="image/png" sizes="16x16" href="/myblog/images/favicon-16x16-next.png">
  <link rel="mask-icon" href="/myblog/images/logo.svg" color="#222">

<link rel="stylesheet" href="/myblog/css/main.css">


<link rel="stylesheet" href="/myblog/lib/font-awesome/css/all.min.css">
  <link rel="stylesheet" href="/myblog/lib/pace/pace-theme-minimal.min.css">
  <script src="/myblog/lib/pace/pace.min.js"></script>

<script id="hexo-configurations">
    var NexT = window.NexT || {};
    var CONFIG = {"hostname":"gao-qianwei.gitee.io","root":"/myblog/","scheme":"Gemini","version":"7.8.0","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12,"onmobile":false},"copycode":{"enable":false,"show_result":false,"style":null},"back2top":{"enable":true,"sidebar":false,"scrollpercent":false},"bookmark":{"enable":false,"color":"#222","save":"auto"},"fancybox":false,"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"algolia":{"hits":{"per_page":10},"labels":{"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}},"localsearch":{"enable":true,"trigger":"auto","top_n_per_article":1,"unescape":false,"preload":false},"motion":{"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},"path":"search.xml"};
  </script>

  <meta name="description" content="一、WordCount 案例实操1）需求&amp;ensp;&amp;ensp;&amp;ensp;&amp;ensp;在给定的文本文件中统计输出每一个单词出现的总次数">
<meta property="og:type" content="article">
<meta property="og:title" content="MapReduce环境配置与案例实操">
<meta property="og:url" content="https://gao-qianwei.gitee.io/myblog/2021/06/06/Hadoop%E4%B9%8BMapReduce%EF%BC%9A%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE%E4%B8%8E%E6%A1%88%E4%BE%8B%E5%AE%9E%E6%93%8D/index.html">
<meta property="og:site_name" content="学无止境">
<meta property="og:description" content="一、WordCount 案例实操1）需求&amp;ensp;&amp;ensp;&amp;ensp;&amp;ensp;在给定的文本文件中统计输出每一个单词出现的总次数">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="https://gao-qianwei.gitee.io/myblog/myblog/2021/06/06/Hadoop%E4%B9%8BMapReduce%EF%BC%9A%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE%E4%B8%8E%E6%A1%88%E4%BE%8B%E5%AE%9E%E6%93%8D/1.png">
<meta property="og:image" content="https://gao-qianwei.gitee.io/myblog/2021/06/06/Hadoop%E4%B9%8BMapReduce%EF%BC%9A%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE%E4%B8%8E%E6%A1%88%E4%BE%8B%E5%AE%9E%E6%93%8D/2.png">
<meta property="article:published_time" content="2021-06-06T10:17:17.707Z">
<meta property="article:modified_time" content="2021-06-06T10:21:43.676Z">
<meta property="article:author" content="高乾威">
<meta property="article:tag" content="数据研发">
<meta property="article:tag" content="Hadoop">
<meta property="article:tag" content="MapReduce">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://gao-qianwei.gitee.io/myblog/myblog/2021/06/06/Hadoop%E4%B9%8BMapReduce%EF%BC%9A%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE%E4%B8%8E%E6%A1%88%E4%BE%8B%E5%AE%9E%E6%93%8D/1.png">

<link rel="canonical" href="https://gao-qianwei.gitee.io/myblog/2021/06/06/Hadoop%E4%B9%8BMapReduce%EF%BC%9A%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE%E4%B8%8E%E6%A1%88%E4%BE%8B%E5%AE%9E%E6%93%8D/">


<script id="page-configurations">
  // https://hexo.io/docs/variables.html
  CONFIG.page = {
    sidebar: "",
    isHome : false,
    isPost : true,
    lang   : 'zh-CN'
  };
</script>

  <title>MapReduce环境配置与案例实操 | 学无止境</title>
  






  <noscript>
  <style>
  .use-motion .brand,
  .use-motion .menu-item,
  .sidebar-inner,
  .use-motion .post-block,
  .use-motion .pagination,
  .use-motion .comments,
  .use-motion .post-header,
  .use-motion .post-body,
  .use-motion .collection-header { opacity: initial; }

  .use-motion .site-title,
  .use-motion .site-subtitle {
    opacity: initial;
    top: initial;
  }

  .use-motion .logo-line-before i { left: initial; }
  .use-motion .logo-line-after i { right: initial; }
  </style>
</noscript>

</head>

<body itemscope itemtype="http://schema.org/WebPage">
  <div class="container use-motion">
    <div class="headband"></div>

    <header class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-container">
  <div class="site-nav-toggle">
    <div class="toggle" aria-label="切换导航栏">
      <span class="toggle-line toggle-line-first"></span>
      <span class="toggle-line toggle-line-middle"></span>
      <span class="toggle-line toggle-line-last"></span>
    </div>
  </div>

  <div class="site-meta">

    <a href="/myblog/" class="brand" rel="start">
      <span class="logo-line-before"><i></i></span>
      <h1 class="site-title">学无止境</h1>
      <span class="logo-line-after"><i></i></span>
    </a>
  </div>

  <div class="site-nav-right">
    <div class="toggle popup-trigger">
        <i class="fa fa-search fa-fw fa-lg"></i>
    </div>
  </div>
</div>




<nav class="site-nav">
  <ul id="menu" class="main-menu menu">
        <li class="menu-item menu-item-home">

    <a href="/myblog/" rel="section"><i class="fa fa-home fa-fw"></i>首页</a>

  </li>
        <li class="menu-item menu-item-about">

    <a href="/myblog/about/" rel="section"><i class="fa fa-user fa-fw"></i>关于</a>

  </li>
        <li class="menu-item menu-item-tags">

    <a href="/myblog/tags/" rel="section"><i class="fa fa-tags fa-fw"></i>标签</a>

  </li>
        <li class="menu-item menu-item-categories">

    <a href="/myblog/categories/" rel="section"><i class="fa fa-th fa-fw"></i>分类</a>

  </li>
        <li class="menu-item menu-item-archives">

    <a href="/myblog/archives/" rel="section"><i class="fa fa-archive fa-fw"></i>归档</a>

  </li>
        <li class="menu-item menu-item-links">

    <a href="/myblog/links/" rel="section"><i class="fa fa-link fa-fw"></i>友链</a>

  </li>
      <li class="menu-item menu-item-search">
        <a role="button" class="popup-trigger"><i class="fa fa-search fa-fw"></i>搜索
        </a>
      </li>
  </ul>
</nav>



  <div class="search-pop-overlay">
    <div class="popup search-popup">
        <div class="search-header">
  <span class="search-icon">
    <i class="fa fa-search"></i>
  </span>
  <div class="search-input-container">
    <input autocomplete="off" autocapitalize="off"
           placeholder="搜索..." spellcheck="false"
           type="search" class="search-input">
  </div>
  <span class="popup-btn-close">
    <i class="fa fa-times-circle"></i>
  </span>
</div>
<div id="search-result">
  <div id="no-result">
    <i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>
  </div>
</div>

    </div>
  </div>

</div>
    </header>

    
  <div class="back-to-top">
    <i class="fa fa-arrow-up"></i>
    <span>0%</span>
  </div>


    <main class="main">
      <div class="main-inner">
        <div class="content-wrap">
          

          <div class="content post posts-expand">
            

    
  
  
  <article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
    <link itemprop="mainEntityOfPage" href="https://gao-qianwei.gitee.io/myblog/2021/06/06/Hadoop%E4%B9%8BMapReduce%EF%BC%9A%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE%E4%B8%8E%E6%A1%88%E4%BE%8B%E5%AE%9E%E6%93%8D/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="image" content="/myblog/images/avatar.png">
      <meta itemprop="name" content="高乾威">
      <meta itemprop="description" content="报我楼成秋望月，把君诗读夜回灯">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="学无止境">
    </span>
      <header class="post-header">
        <h1 class="post-title" itemprop="name headline">
          MapReduce环境配置与案例实操
        </h1>

        <div class="post-meta">

          
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="far fa-calendar"></i>
              </span>
              <span class="post-meta-item-text">发表于</span>
              

              <time title="创建时间：2021-06-06 18:17:17 / 修改时间：18:21:43" itemprop="dateCreated datePublished" datetime="2021-06-06T18:17:17+08:00">2021-06-06</time>
            </span>
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="far fa-folder"></i>
              </span>
              <span class="post-meta-item-text">分类于</span>
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/myblog/categories/%E6%95%B0%E6%8D%AE%E7%A0%94%E5%8F%91/" itemprop="url" rel="index"><span itemprop="name">数据研发</span></a>
                </span>
                  ，
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/myblog/categories/%E6%95%B0%E6%8D%AE%E7%A0%94%E5%8F%91/Hadoop/" itemprop="url" rel="index"><span itemprop="name">Hadoop</span></a>
                </span>
                  ，
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/myblog/categories/%E6%95%B0%E6%8D%AE%E7%A0%94%E5%8F%91/Hadoop/MapReduce/" itemprop="url" rel="index"><span itemprop="name">MapReduce</span></a>
                </span>
            </span>

          
            <span class="post-meta-item" title="阅读次数" id="busuanzi_container_page_pv" style="display: none;">
              <span class="post-meta-item-icon">
                <i class="fa fa-eye"></i>
              </span>
              <span class="post-meta-item-text">阅读次数：</span>
              <span id="busuanzi_value_page_pv"></span>
            </span>
            <span class="post-meta-divider">|</span>

        </div>
      </header>

    
    
    
    <div class="post-body" itemprop="articleBody">

      
        <h3 id="一、WordCount-案例实操"><a href="#一、WordCount-案例实操" class="headerlink" title="一、WordCount 案例实操"></a>一、WordCount 案例实操</h3><h4 id="1）需求"><a href="#1）需求" class="headerlink" title="1）需求"></a>1）需求</h4><p>&ensp;&ensp;&ensp;&ensp;在给定的文本文件中统计输出每一个单词出现的总次数 </p>
<span id="more"></span>

<p>（1）输入数据 hello.txt </p>
<p>（2）期望输出数据 atguigu 2 banzhang 1 cls 2 hadoop 1 jiao 1 ss 2 xue 1 </p>
<h4 id="2）需求分析"><a href="#2）需求分析" class="headerlink" title="2）需求分析"></a>2）需求分析</h4><p>&ensp;&ensp;&ensp;&ensp;按照 MapReduce 编程规范，分别编写 Mapper，Reducer，Driver</p>
<img src="/myblog/myblog/2021/06/06/Hadoop%E4%B9%8BMapReduce%EF%BC%9A%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE%E4%B8%8E%E6%A1%88%E4%BE%8B%E5%AE%9E%E6%93%8D/1.png" alt="image-20210525010300155" style="zoom: 50%;">



<h3 id="二、环境准备"><a href="#二、环境准备" class="headerlink" title="二、环境准备"></a>二、环境准备</h3><p>（1）创建 maven 工程，MapReduceDemo </p>
<p>（2）在 pom.xml 文件中添加如下依赖</p>
<figure class="highlight xml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br></pre></td><td class="code"><pre><span class="line"><span class="tag">&lt;<span class="name">dependencies</span>&gt;</span></span><br><span class="line">     <span class="tag">&lt;<span class="name">dependency</span>&gt;</span></span><br><span class="line">         <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>org.apache.hadoop<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">         <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>hadoop-client<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">         <span class="tag">&lt;<span class="name">version</span>&gt;</span>3.1.3<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">     <span class="tag">&lt;/<span class="name">dependency</span>&gt;</span></span><br><span class="line">     <span class="tag">&lt;<span class="name">dependency</span>&gt;</span></span><br><span class="line">         <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>junit<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">         <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>junit<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">         <span class="tag">&lt;<span class="name">version</span>&gt;</span>4.12<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">     <span class="tag">&lt;/<span class="name">dependency</span>&gt;</span></span><br><span class="line">     <span class="tag">&lt;<span class="name">dependency</span>&gt;</span></span><br><span class="line">         <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>org.slf4j<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">         <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>slf4j-log4j12<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">         <span class="tag">&lt;<span class="name">version</span>&gt;</span>1.7.30<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">     <span class="tag">&lt;/<span class="name">dependency</span>&gt;</span></span><br><span class="line"><span class="tag">&lt;/<span class="name">dependencies</span>&gt;</span></span><br></pre></td></tr></table></figure>

<p>（3）在项目的 src/main/resources 目录下，新建一个文件，命名为“log4j.properties”，在 文件中填入。</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line">log4j.rootLogger&#x3D;INFO, stdout </span><br><span class="line">log4j.appender.stdout&#x3D;org.apache.log4j.ConsoleAppender </span><br><span class="line">log4j.appender.stdout.layout&#x3D;org.apache.log4j.PatternLayout </span><br><span class="line">log4j.appender.stdout.layout.ConversionPattern&#x3D;%d %p [%c] - %m%n </span><br><span class="line">log4j.appender.logfile&#x3D;org.apache.log4j.FileAppender </span><br><span class="line">log4j.appender.logfile.File&#x3D;target&#x2F;spring.log </span><br><span class="line">log4j.appender.logfile.layout&#x3D;org.apache.log4j.PatternLayout </span><br><span class="line">log4j.appender.logfile.layout.ConversionPattern&#x3D;%d %p [%c] - %m%n</span><br></pre></td></tr></table></figure>

<p>（4）创建包名：com.james.mapreduce.wordcount</p>
<h3 id="三、编写程序运行"><a href="#三、编写程序运行" class="headerlink" title="三、编写程序运行"></a>三、编写程序运行</h3><p>（1）编写 Mapper 类</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">package</span> com.atguigu.mapreduce.wordcount;</span><br><span class="line"><span class="keyword">import</span> java.io.IOException;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.io.IntWritable;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.io.LongWritable;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.io.Text;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.mapreduce.Mapper;</span><br><span class="line"><span class="keyword">public</span> <span class="class"><span class="keyword">class</span> <span class="title">WordCountMapper</span> <span class="keyword">extends</span> <span class="title">Mapper</span>&lt;<span class="title">LongWritable</span>, <span class="title">Text</span>, <span class="title">Text</span>, </span></span><br><span class="line"><span class="class"><span class="title">IntWritable</span>&gt;</span>&#123;</span><br><span class="line">    Text k = <span class="keyword">new</span> Text();</span><br><span class="line">    IntWritable v = <span class="keyword">new</span> IntWritable(<span class="number">1</span>);</span><br><span class="line">    <span class="meta">@Override</span></span><br><span class="line">    <span class="function"><span class="keyword">protected</span> <span class="keyword">void</span> <span class="title">map</span><span class="params">(LongWritable key, Text value, Context context)</span></span></span><br><span class="line"><span class="function">    <span class="keyword">throws</span> IOException, InterruptedException </span>&#123;</span><br><span class="line">        <span class="comment">// 1 获取一行</span></span><br><span class="line">        String line = value.toString();</span><br><span class="line">        <span class="comment">// 2 切割</span></span><br><span class="line">        String[] words = line.split(<span class="string">&quot; &quot;</span>);</span><br><span class="line">        <span class="comment">// 3 输出</span></span><br><span class="line">        <span class="keyword">for</span> (String word : words) &#123;</span><br><span class="line">            k.set(word);</span><br><span class="line">            context.write(k, v);	</span><br><span class="line">        &#125;</span><br><span class="line">    &#125;</span><br><span class="line">&#125;</span><br><span class="line"></span><br></pre></td></tr></table></figure>

<p>（2）编写 Reducer 类</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">package</span> com.atguigu.mapreduce.wordcount;</span><br><span class="line"><span class="keyword">import</span> java.io.IOException;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.io.IntWritable;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.io.Text;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.mapreduce.Reducer;</span><br><span class="line"><span class="keyword">public</span> <span class="class"><span class="keyword">class</span> <span class="title">WordCountReducer</span> <span class="keyword">extends</span> <span class="title">Reducer</span>&lt;<span class="title">Text</span>, <span class="title">IntWritable</span>, <span class="title">Text</span>, </span></span><br><span class="line"><span class="class"><span class="title">IntWritable</span>&gt;</span>&#123;</span><br><span class="line">    <span class="keyword">int</span> sum;</span><br><span class="line">    IntWritable v = <span class="keyword">new</span> IntWritable();</span><br><span class="line">    <span class="meta">@Override</span></span><br><span class="line">    <span class="function"><span class="keyword">protected</span> <span class="keyword">void</span> <span class="title">reduce</span><span class="params">(Text key, Iterable&lt;IntWritable&gt; values,Context </span></span></span><br><span class="line"><span class="function"><span class="params">    context)</span> <span class="keyword">throws</span> IOException, InterruptedException </span>&#123;</span><br><span class="line">        <span class="comment">// 1 累加求和</span></span><br><span class="line">        sum = <span class="number">0</span>;</span><br><span class="line">        <span class="keyword">for</span> (IntWritable count : values) &#123;</span><br><span class="line">        	sum += count.get();</span><br><span class="line">        &#125;</span><br><span class="line">        <span class="comment">// 2 输出</span></span><br><span class="line">        v.set(sum);</span><br><span class="line">        context.write(key,v);</span><br><span class="line">    &#125;</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>

<p>（3）编写 Driver 驱动类</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">package</span> com.atguigu.mapreduce.wordcount;</span><br><span class="line"><span class="keyword">import</span> java.io.IOException;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.conf.Configuration;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.fs.Path;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.io.IntWritable;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.io.Text;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.mapreduce.Job;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.mapreduce.lib.input.FileInputFormat;</span><br><span class="line"><span class="keyword">import</span> org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;</span><br><span class="line"><span class="keyword">public</span> <span class="class"><span class="keyword">class</span> <span class="title">WordCountDriver</span> </span>&#123;</span><br><span class="line">    <span class="function"><span class="keyword">public</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title">main</span><span class="params">(String[] args)</span> <span class="keyword">throws</span> IOException, </span></span><br><span class="line"><span class="function">    ClassNotFoundException, InterruptedException </span>&#123;</span><br><span class="line">        <span class="comment">// 1 获取配置信息以及获取 job 对象</span></span><br><span class="line">        Configuration conf = <span class="keyword">new</span> Configuration();</span><br><span class="line">        Job job = Job.getInstance(conf);</span><br><span class="line">        </span><br><span class="line">        <span class="comment">// 2 关联本 Driver 程序的 jar</span></span><br><span class="line">        job.setJarByClass(WordCountDriver.class);</span><br><span class="line">       </span><br><span class="line">        <span class="comment">// 3 关联 Mapper 和 Reducer 的 jar</span></span><br><span class="line">        job.setMapperClass(WordCountMapper.class);</span><br><span class="line">        job.setReducerClass(WordCountReducer.class);</span><br><span class="line">       </span><br><span class="line">        <span class="comment">// 4 设置 Mapper 输出的 kv 类型</span></span><br><span class="line">        job.setMapOutputKeyClass(Text.class);</span><br><span class="line">        job.setMapOutputValueClass(IntWritable.class);</span><br><span class="line">       </span><br><span class="line">        <span class="comment">// 5 设置最终输出 kv 类型</span></span><br><span class="line">        job.setOutputKeyClass(Text.class);</span><br><span class="line">        job.setOutputValueClass(IntWritable.class);</span><br><span class="line">        </span><br><span class="line">        <span class="comment">// 6 设置输入和输出路径</span></span><br><span class="line">        FileInputFormat.setInputPaths(job, <span class="keyword">new</span> Path(args[<span class="number">0</span>]));</span><br><span class="line">        FileOutputFormat.setOutputPath(job, <span class="keyword">new</span> Path(args[<span class="number">1</span>]));</span><br><span class="line">        </span><br><span class="line">        <span class="comment">// 7 提交 job</span></span><br><span class="line">        <span class="keyword">boolean</span> result = job.waitForCompletion(<span class="keyword">true</span>);</span><br><span class="line">        System.exit(result ? <span class="number">0</span> : <span class="number">1</span>);</span><br><span class="line">    &#125;</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>

<p>本地测试 </p>
<p>（1）需要首先配置好 HADOOP_HOME 变量以及 Windows 运行依赖 </p>
<p>（2）在 IDEA/Eclipse 上运行程序</p>
<h3 id="四、提交到集群测试"><a href="#四、提交到集群测试" class="headerlink" title="四、提交到集群测试"></a>四、提交到集群测试</h3><p>（1）用 maven 打 jar 包，需要添加的打包插件依赖</p>
<figure class="highlight xml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br></pre></td><td class="code"><pre><span class="line"><span class="tag">&lt;<span class="name">build</span>&gt;</span></span><br><span class="line">    <span class="tag">&lt;<span class="name">plugins</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;<span class="name">plugin</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>maven-compiler-plugin<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">version</span>&gt;</span>3.6.1<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">configuration</span>&gt;</span></span><br><span class="line">                <span class="tag">&lt;<span class="name">source</span>&gt;</span>1.8<span class="tag">&lt;/<span class="name">source</span>&gt;</span></span><br><span class="line">                <span class="tag">&lt;<span class="name">target</span>&gt;</span>1.8<span class="tag">&lt;/<span class="name">target</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;/<span class="name">configuration</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;/<span class="name">plugin</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;<span class="name">plugin</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>maven-assembly-plugin<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">configuration</span>&gt;</span></span><br><span class="line">                <span class="tag">&lt;<span class="name">descriptorRefs</span>&gt;</span></span><br><span class="line">                    <span class="tag">&lt;<span class="name">descriptorRef</span>&gt;</span>jar-with-dependencies<span class="tag">&lt;/<span class="name">descriptorRef</span>&gt;</span></span><br><span class="line">                <span class="tag">&lt;/<span class="name">descriptorRefs</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;/<span class="name">configuration</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">executions</span>&gt;</span></span><br><span class="line">                <span class="tag">&lt;<span class="name">execution</span>&gt;</span></span><br><span class="line">                    <span class="tag">&lt;<span class="name">id</span>&gt;</span>make-assembly<span class="tag">&lt;/<span class="name">id</span>&gt;</span></span><br><span class="line">                    <span class="tag">&lt;<span class="name">phase</span>&gt;</span>package<span class="tag">&lt;/<span class="name">phase</span>&gt;</span></span><br><span class="line">                    <span class="tag">&lt;<span class="name">goals</span>&gt;</span></span><br><span class="line">                        <span class="tag">&lt;<span class="name">goal</span>&gt;</span>single<span class="tag">&lt;/<span class="name">goal</span>&gt;</span></span><br><span class="line">                    <span class="tag">&lt;/<span class="name">goals</span>&gt;</span></span><br><span class="line">                <span class="tag">&lt;/<span class="name">execution</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;/<span class="name">executions</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;/<span class="name">plugin</span>&gt;</span></span><br><span class="line">    <span class="tag">&lt;/<span class="name">plugins</span>&gt;</span></span><br><span class="line"><span class="tag">&lt;/<span class="name">build</span>&gt;</span></span><br></pre></td></tr></table></figure>

<p>注意：如果工程上显示红叉。在项目上右键-&gt;maven-&gt;Reimport 刷新即可。</p>
<p>（2）将程序打成 jar 包</p>
<p><img src="/myblog/2021/06/06/Hadoop%E4%B9%8BMapReduce%EF%BC%9A%E7%8E%AF%E5%A2%83%E9%85%8D%E7%BD%AE%E4%B8%8E%E6%A1%88%E4%BE%8B%E5%AE%9E%E6%93%8D/2.png" alt="image-20210525011017523"></p>
<p>（3）修改不带依赖的 jar 包名称为 wc.jar，并拷贝该 jar 包到 Hadoop 集群的 /opt/module/hadoop-3.1.3 路径。 </p>
<p>（4）启动 Hadoop 集群</p>
<figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">[atguigu@hadoop102 hadoop-3.1.3]sbin/start-dfs.sh</span><br><span class="line">[atguigu@hadoop103 hadoop-3.1.3]$ sbin/start-yarn.sh </span><br></pre></td></tr></table></figure>

<p>（5）执行 WordCount 程序</p>
<figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">[atguigu@hadoop102 hadoop-3.1.3]$ hadoop jar wc.jar com.atguigu.mapreduce.wordcount.WordCountDriver /user/atguigu/input  /user/atguigu/output</span><br></pre></td></tr></table></figure>


    </div>

    
    
    

      <footer class="post-footer">
          <div class="post-tags">
              <a href="/myblog/tags/%E6%95%B0%E6%8D%AE%E7%A0%94%E5%8F%91/" rel="tag"># 数据研发</a>
              <a href="/myblog/tags/Hadoop/" rel="tag"># Hadoop</a>
              <a href="/myblog/tags/MapReduce/" rel="tag"># MapReduce</a>
          </div>

        


        
    <div class="post-nav">
      <div class="post-nav-item">
    <a href="/myblog/2021/06/06/Hadoop%E4%B9%8BMapReduce%EF%BC%9Ashuffle%E6%9C%BA%E5%88%B6/" rel="prev" title="shuffle运行机制">
      <i class="fa fa-chevron-left"></i> shuffle运行机制
    </a></div>
      <div class="post-nav-item">
    <a href="/myblog/2021/06/06/%E6%95%B0%E6%8D%AE%E7%BB%93%E6%9E%84%EF%BC%9ALRU%E7%BC%93%E5%AD%98/" rel="next" title="LRU缓存算法">
      LRU缓存算法 <i class="fa fa-chevron-right"></i>
    </a></div>
    </div>
      </footer>
    
  </article>
  
  
  



          </div>
          

<script>
  window.addEventListener('tabs:register', () => {
    let { activeClass } = CONFIG.comments;
    if (CONFIG.comments.storage) {
      activeClass = localStorage.getItem('comments_active') || activeClass;
    }
    if (activeClass) {
      let activeTab = document.querySelector(`a[href="#comment-${activeClass}"]`);
      if (activeTab) {
        activeTab.click();
      }
    }
  });
  if (CONFIG.comments.storage) {
    window.addEventListener('tabs:click', event => {
      if (!event.target.matches('.tabs-comment .tab-content .tab-pane')) return;
      let commentClass = event.target.classList[1];
      localStorage.setItem('comments_active', commentClass);
    });
  }
</script>

        </div>
          
  
  <div class="toggle sidebar-toggle">
    <span class="toggle-line toggle-line-first"></span>
    <span class="toggle-line toggle-line-middle"></span>
    <span class="toggle-line toggle-line-last"></span>
  </div>

  <aside class="sidebar">
    <div class="sidebar-inner">

      <ul class="sidebar-nav motion-element">
        <li class="sidebar-nav-toc">
          文章目录
        </li>
        <li class="sidebar-nav-overview">
          站点概览
        </li>
      </ul>

      <!--noindex-->
      <div class="post-toc-wrap sidebar-panel">
          <div class="post-toc motion-element"><ol class="nav"><li class="nav-item nav-level-3"><a class="nav-link" href="#%E4%B8%80%E3%80%81WordCount-%E6%A1%88%E4%BE%8B%E5%AE%9E%E6%93%8D"><span class="nav-text">一、WordCount 案例实操</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#1%EF%BC%89%E9%9C%80%E6%B1%82"><span class="nav-text">1）需求</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#2%EF%BC%89%E9%9C%80%E6%B1%82%E5%88%86%E6%9E%90"><span class="nav-text">2）需求分析</span></a></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E4%BA%8C%E3%80%81%E7%8E%AF%E5%A2%83%E5%87%86%E5%A4%87"><span class="nav-text">二、环境准备</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E4%B8%89%E3%80%81%E7%BC%96%E5%86%99%E7%A8%8B%E5%BA%8F%E8%BF%90%E8%A1%8C"><span class="nav-text">三、编写程序运行</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E5%9B%9B%E3%80%81%E6%8F%90%E4%BA%A4%E5%88%B0%E9%9B%86%E7%BE%A4%E6%B5%8B%E8%AF%95"><span class="nav-text">四、提交到集群测试</span></a></li></ol></div>
      </div>
      <!--/noindex-->

      <div class="site-overview-wrap sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
    <img class="site-author-image" itemprop="image" alt="高乾威"
      src="/myblog/images/avatar.png">
  <p class="site-author-name" itemprop="name">高乾威</p>
  <div class="site-description" itemprop="description">报我楼成秋望月，把君诗读夜回灯</div>
</div>
<div class="site-state-wrap motion-element">
  <nav class="site-state">
      <div class="site-state-item site-state-posts">
          <a href="/myblog/archives/">
        
          <span class="site-state-item-count">91</span>
          <span class="site-state-item-name">日志</span>
        </a>
      </div>
      <div class="site-state-item site-state-categories">
            <a href="/myblog/categories/">
          
        <span class="site-state-item-count">27</span>
        <span class="site-state-item-name">分类</span></a>
      </div>
      <div class="site-state-item site-state-tags">
            <a href="/myblog/tags/">
          
        <span class="site-state-item-count">59</span>
        <span class="site-state-item-name">标签</span></a>
      </div>
  </nav>
</div>
  <div class="links-of-author motion-element">
      <span class="links-of-author-item">
        <a href="http://github.com/GaoQianwei" title="GitHub → http:&#x2F;&#x2F;github.com&#x2F;GaoQianwei" rel="noopener" target="_blank">GitHub</a>
      </span>
      <span class="links-of-author-item">
        <a href="mailto:2966807184@qq.com" title="E-Mail → mailto:2966807184@qq.com" rel="noopener" target="_blank">E-Mail</a>
      </span>
      <span class="links-of-author-item">
        <a href="https://www.zhihu.com/people/xue-wu-zhi-jing-75-22" title="知乎 → https:&#x2F;&#x2F;www.zhihu.com&#x2F;people&#x2F;xue-wu-zhi-jing-75-22" rel="noopener" target="_blank">知乎</a>
      </span>
      <span class="links-of-author-item">
        <a href="https://blog.csdn.net/weixin_45341339" title="CSDN → https:&#x2F;&#x2F;blog.csdn.net&#x2F;weixin_45341339" rel="noopener" target="_blank">CSDN</a>
      </span>
  </div>



      </div><div>
  <canvas id="canvasDiyBlock" style="width:60%;">当前浏览器不支持canvas，请更换浏览器后再试</canvas>
<script src="/myblog/js/custom/clock.js"></script>

</div>


    </div>
  </aside>
  <div id="sidebar-dimmer"></div>


      </div>
    </main>

    <footer class="footer">
      <div class="footer-inner">
        

        

<div class="copyright">
  
  &copy; 2021-04 到 
  <span itemprop="copyrightYear">2022</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">高乾威</span>
</div>
  <div class="powered-by">由 <a href="https://hexo.io/" class="theme-link" rel="noopener" target="_blank">Hexo</a> & <a href="https://theme-next.org/" class="theme-link" rel="noopener" target="_blank">NexT.Gemini</a> 强力驱动
  </div>

<span class="post-meta-divider">|</span>

<div class="theme-info">
  <div class="powered-by"></div>
  <span class="post-count">全站共 117.2k 字</span>
</div>

<span class="post-meta-divider">|</span>
        
<div class="busuanzi-count">
  <script async src="https://busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script>
</div>








      </div>
    </footer>
  </div>

  
  <script src="/myblog/lib/anime.min.js"></script>
  <script src="/myblog/lib/velocity/velocity.min.js"></script>
  <script src="/myblog/lib/velocity/velocity.ui.min.js"></script>

<script src="/myblog/js/utils.js"></script>

<script src="/myblog/js/motion.js"></script>


<script src="/myblog/js/schemes/pisces.js"></script>


<script src="/myblog/js/next-boot.js"></script>


  <script defer src="/myblog/lib/three/three.min.js"></script>
    <script defer src="/myblog/lib/three/canvas_lines.min.js"></script>


  




  
<script src="/myblog/js/local-search.js"></script>













  

  

</body>
</html>
